#==================================================================================== # «COLOR-ENHANCED TIME PRESSURE ENVELOPE» (CE-TPE) # # Cesare Brizio, 10 July 2023 - PROOF OF CONCEPT - NOT A VIABLE PIECE OF SOFTWARE!!! # # Released under CC BY-SA 4.0 License # (https://creativecommons.org/licenses/by-sa/4.0/deed.en) # #============================================================================================ # GENERATE A TIME PRESSURE ENVELOPE (TPE) WHERE THE LINES CORRESPONDING TO A SET OF AUDIO # SAMPLES CONTAINING A SPECIFIC FREQUENCY ABOVE A GIVEN SOUND PRESSURE ARE COLORED # DIFFERENTLY (RED) THAN THE DEFAULT TPE COLOR (BLACK) #============================================================================================= # #========================================================== # !!!!!!!!!!!!!!!!!! CAUTION !!!!!!!!!!!!!!!!!!!!!!!!!! # THIS PROTOTYPE WAS MADE TO WORK JUST WITH THE 1-CHANNEL # EXAMPLE WAV FILE C:\CE-TPE_Python\CETPE_Audio_Sample.wav # AND MAY NOT NECESSARILY WORK WITH STEREO WAV FILES! #=========================================================== # #=============================================================================================== # I DON'T KNOW PYTHON WELL ENOUGH TO START FROM SCRATCH. CONSEQUENTLY, I'M COMPELLED # TO USE STANDARD PYTHON LIBRARIES. FUNCTIONS/METHODS ARE MOSTLY INVOKED WITH DEFAULT # PARAMETERS, E.G., I DIDN'T ATTEMPT TO OPTIMIZE THE WINDOWING FUNCTION AND THE FFT SIZE OF # signal.spectrogram(). FOR THE ACTUAL «COLOR-ENHANCED TIME PRESSURE ENVELOPE», # THAT SHOULD BE FULLY CONTROLLED BY USER-SET PARAMETERS, EXTENSIVE REDESIGN OF THIS # PROOF OF CONCEPT IS REQUIRED. # ------------------------------------ # THE OVERSIMPLIFIED STEPS FOR CE-TPE GENERATION AS PROPOSED HERE INCLUDE: # 1) GENERATING AN ORDINARY TPE FROM THE INPUT FILE # 2) GENERATING THE CORRESPONDING TIME/FREQUENCY SPECTROGRAM (THAT WILL BE USED TO INVESTIGATE # THE PRESENCE AND THE INTENSITY OF THE INTERESTING FREQUENCY) # 3) ASKING USER INPUT FOR INTERESTING FREQUENCY AND THRESHOLD PRESSURE LEVEL # 4) GENERATING AN EMPTY SUBPLOT ("ax3") AS BIG AS THE ORIGINAL TIME PRESSURE ENVELOPE (TPE) # 4) REGENERATING IN ax3 THE SAME TPE AS IN ax1 # 5) SEARCHING THE "spgram" NUMPY ARRAY FOR VALUES ABOVE THRESHOLD IN THE SPECIFIC # FREQUENCY BAND. # 6) OVERWRITING IN RED THE TPE LINES IN ax3 THAT MEET THE CONDITION # # VERY OBVIOUSLY, CONSIDERING THAT THE CE-TPE IS A SPECTROGRAM UNDER DISGUISE, THE RESOLUTION # OF THE OVERWRITING DOESN'T MATCH THE ORIGINAL TIME-PRESSURE ENVELOPE'S, AND CANNOT EXCEED # THE TIME RESOLUTION OF THE SPECTROGRAM. THE DIFFERENCE BECOMES EVIDENT ONLY WHEN ZOOMING, IF # THE ON-SCREEN RESOLUTION OF THE X-AXIS IN ax3 EXCEEDS THE NUMBER OF COLUMNS IN THE # SPECTROGRAM. #============================================================================================= import numpy as np import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec from scipy.io import wavfile from scipy import signal from pydub import AudioSegment from mpl_toolkits.axes_grid1.inset_locator import inset_axes import winsound # needed just to play directly from the Wav file import sounddevice as sd # needed to play sound from a numpy array from matplotlib.widgets import Button import PySimpleGUI as sg #======================================================================= # Version 1.0 - 10 July 2023 - Initial version # Version 2.0 - 13 July 2023 - Added buttons to play or stop the sound # from the numpy array where the wavefile # data are stored # Version 3.0 - 13 July 2023 - Zoom on the top subplot regenerates # the other subplots # Version 3.0 - 13 July 2023 - Play restricted to the zoomed section # Version 4.0 - 14 July 2023 - Ask parameters: frequency and threshold # value. Display interesting frequency on # spectrogram. Check the maximum sound # pressure in the interesting spectrogram # row and display in prompt for threshold. #======================================================================= version = 'Version 4.0' #Import the .wav audio f = 'C:\CE-TPE_Python\CETPE_Audio_Sample.wav' #s = sampling frequency (int) #a = audio signal (numpy array) s,a = wavfile.read(f) frames = len(a) duration = frames / float(s) fps = round(frames / duration) print('Sampling Rate:',s) print('Audio Shape:',np.shape(a)) print('Frames:',frames) print('Duration:',duration) print('Frames per second:',fps) #one figure, size 18x9, with 3 vertically stacked subplots plt.rcParams['figure.dpi'] = 100 fig = plt.figure(figsize=(18,9)) fig.canvas.manager.set_window_title('Color-Enhanced Time-Pressure Envelope '+version) #height_ratios must match the number of rows #width_ratios must match the number of columns gs = GridSpec(ncols=1, nrows=3, height_ratios=[1, 2, 1]) ax1 = fig.add_subplot(gs[0]) ax2 = fig.add_subplot(gs[1], sharex=ax1) ax3 = fig.add_subplot(gs[2], sharex=ax1) #Display monophonic signal na = a.shape[0] #na = number of samples #s = sampling frequency (int) #la = number of samples / sampling frequency la = na / s #t = array of timestamps t = np.linspace(0,la,na) #============================== #a is the array with AMPLITUDES #t contains just the timestamps #============================== xlim_left_ax1=0 xlim_right_ax1=duration ax1.set_xlim(xlim_left_ax1, xlim_right_ax1) ax1.plot(t,a,color='purple') ax1.set_title('Time-Pressure Envelope (TPE) - Apply rectangle-zoom here!', fontweight="bold") ax1.set(ylabel='Sample values') #=================== #start added 13 July #=================== # Declare and register callbacks def re_zoom(event): global xlim_left_ax1 #GLOBAL needed because it will be updated by re_zoom(event) global xlim_right_ax1 #GLOBAL needed because it will be updated by re_zoom(event) xlim_left_ax1,xlim_right_ax1=ax1.get_xlim() print("updated left xlim of ax1: ", xlim_left_ax1) print("updated right xlim of ax1: ", xlim_right_ax1) return # End re_zoom() ax1.callbacks.connect('xlim_changed',re_zoom) # for rectangle-select zoom #=================== #end added 13 July #=================== print("Left xlim of ax1: ", xlim_left_ax1) print("Right xlim of ax1: ", xlim_right_ax1) #Display spectrogram #s = sampling frequency (int) #a = audio signal (numpy array) #Returns: #fr = array of sample frequencies #tm = array of segment times #spgram = Spectrogram of x (numpy array). By default, the last axis of spgram corresponds to the segment times. # contains fr lines and tm columns #See https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.spectrogram.html #for default parameters fr, tm, spgram = signal.spectrogram(a,s) #lspg = natural log of spgram (numpy array) lspg = np.log(spgram) MyColorMesh = ax2.pcolormesh(tm,fr,lspg,shading='auto') ax2.set(ylabel='Frequency (Hz)') #place a colorbar aside the spectrogram axins = inset_axes( ax2, width="2%", # width: 5% of parent_bbox width height="100%", # height: 50% loc="lower left", bbox_to_anchor=(1.05, 0., 1, 1), bbox_transform=ax2.transAxes, borderpad=0, ) cb = fig.colorbar(MyColorMesh, cax=axins, spacing='proportional') cb.ax.set_yscale('linear') # a contains as many subscripts, as the samples: 2250466 print("\n") print("---------------------------------") print("Dimensions of a = ", end=" ") print(np.shape(a)) # a and t are isomorph print("\n") print("Dimensions of t = ", end=" ") print(np.shape(t)) print("---------------------------------") print("\n") print("-------------------------------------") print("List a few values in a (example)") print("-------------------------------------") for sample_num in range(100000, 100020): print(round(a[sample_num],4), end=" ") print("\n") print("-------------------------------------") print("List more values in a (example)") print("-------------------------------------") for sample_num in range(1000000, 1000020): print(round(a[sample_num],4), end=" ") print("\n") print("-------------------------------------") print("List even more values in a (example)") print("-------------------------------------") for sample_num in range(2000000, 2000020): print(round(a[sample_num],4), end=" ") print("\n") print("---------------------------------") print("Dimensions of spgram = ", end=" ") print(np.shape(spgram)) print("---------------------------------") rows_spgram,columns_spgram=np.shape(spgram) spgram_columns_per_second = round(columns_spgram / duration) print('Rows in spgram:',rows_spgram) print('Columns in spgram:',columns_spgram) print('Columns per second in spgram:',spgram_columns_per_second) # As the default of signal.spectrogram resolution, with a WAV file # recorded at a sampling frequency of 250kHz (passing band is 0-125kHz) we get: # 129 frequency bands (rows), each row is 969kHz wide, # 10046 time frames (columns), each second contains 1116 columns # Different FFT sizes and windowing functions may change those values print("\n") print("-------------------------------------") print("List a few values in spgram (example)") print("-------------------------------------") for frequency_band in range(80, 100): print("\n") for time in range (8000, 8010): np.set_printoptions(precision=3) print(round(spgram[frequency_band, time],4), end=" ") #which is the interesting frequency? #The program was tested with 42000 event, values = sg.Window('CE-TPE Parameters, Step 1', [[sg.T('Interesting frequency in Hz (try 42000)'), sg.In(key='-FREQ-')], [sg.B('OK'), sg.B('Cancel') ]]).read(close=True) interesting_frequency = int(values['-FREQ-']) print('Interesting Frequency:',interesting_frequency) #write a line on ax2 to display the chosen frequency x_int_freq, y_int_freq = [0,duration],[interesting_frequency, interesting_frequency] ax2.plot(x_int_freq, y_int_freq, color='yellow') #set ax2 title to include the interesting frequency ax2.set_title('Time/Frequency Spectrographic Image (TFSI), yellow line = interesting frequency ('+str(interesting_frequency)+' Hz)', fontweight="bold") #================================================= #Calculate the interesting spectrogram row #================================================= # recorded_frequency_range = sampling frequency /2 recorded_frequency_range = s / 2 print('Recorded Frequency Range:',recorded_frequency_range) #number of frames in each spectrogram column frames_per_spgram_column = frames / columns_spgram print('Frames for each spgram column:',frames_per_spgram_column) #half number of frames in each spectrogram column half_frames_per_spgram_column = round(frames_per_spgram_column*0.5) #which row in spgram contains the interesting frequency? interesting_spgram_row = round((rows_spgram*interesting_frequency)/recorded_frequency_range) print('Interesting spgram row:',interesting_spgram_row) #To help the user choose an appropriate threshold, maximum sound pressure #in the interesting spectrogram row is collected and proposed in the #input prompt Max_sound_pressure_interesting_spectrogram_row = 0 for spgram_column in range(1, columns_spgram): if spgram[interesting_spgram_row,spgram_column] >= Max_sound_pressure_interesting_spectrogram_row: Max_sound_pressure_interesting_spectrogram_row=spgram[interesting_spgram_row,spgram_column] #which is the threshold pressure value above which the frequency is considered relevant? #The program was tested with 10000 event, values = sg.Window('CE-TPE Parameters, Threshold Pressure (spectrogram displays natural logs)', [[sg.T('Threshold Pressure (maximum sound pressure in row = '+str(Max_sound_pressure_interesting_spectrogram_row)+', try 10000)'), sg.In(key='-PRES-')], [sg.B('OK'), sg.B('Cancel') ]]).read(close=True) threshold_pressure_value = int(values['-PRES-']) #To which value does it correspond in the logaritmic spectrogram? natural_log_threshold_pressure_value = np.log(threshold_pressure_value) print('Threshold pressure value:',threshold_pressure_value) print('Natural log of threshold pressure value (as displayed in ax2):',natural_log_threshold_pressure_value) #In the third subplot I plot a copy of the original TPE, #then I overwrite with different colors the vertical #lines corresponding to samples in spgram that contain #a specific frequency above a given pressure. ax3.set_title('Color-Enhanced Time-Pressure Envelope (CE-TPE) - Highlighted signals contain the '+str(interesting_frequency)+'Hz band above '+str(threshold_pressure_value)+' ('+str(round(natural_log_threshold_pressure_value,4))+' in spectrogram log scale)', fontweight="bold") ax3.set(ylabel='Sample values') ax3.set(xlabel='Time (s)') ax3.set_xlim(0, duration) ax3.set_ylim(-32768, +32768) #regenerate the TPE ax3.plot(t,a,color='black') #x1 contains the X of the first and of the last point of the segment #y1 contains the y of the first and of the last point of the segment x1, y1 = [0, duration], [0, 0] ax3.plot(x1, y1, color='green', marker = 'o') #==================================================================================== # Scan the interesting spectrogram row (corresponding to the interesting frequency) # to check whether the sound pressure exceeds the threshold. In that case, fill the # array Found_in_spgram_columns[] with the index of the spgram column that fulfills # the requirements, and some ancillary arrays with other useful data. #===================================================================================== Found_in_spgram_columns = [] Found_at_t_time = [] Found_at_sample_number = [] A_min_sound_pressure_spgram_column = [] A_max_sound_pressure_spgram_column = [] Count_found_spgram_columns = 0 for spgram_column in range(1, columns_spgram): if spgram[interesting_spgram_row,spgram_column] >= threshold_pressure_value: Count_found_spgram_columns = Count_found_spgram_columns + 1 #save in Found_in_spgram_columns[] Found_in_spgram_columns.append(spgram_column) print('Interesting pressure above threshold found in spgram column: ',spgram_column) #show the value in the spgram cell of the interesting frequency row #that exceeds the threshold value print('Value in spgram cell:',spgram[interesting_spgram_row,spgram_column]) #remap the column index to a specific time in ax3 time_found = spgram_column / spgram_columns_per_second #save in Found_at_t_time[] Found_at_t_time.append(time_found) print('Corresponding with TPE time: ',time_found) #find which sample in numpy array t corresponds to the time sample_number = round(time_found*fps) #save in Found_at_sample_number[] Found_at_sample_number.append(sample_number) print('Corresponding with t sample : ',sample_number) #Depending from the FFT size, any spgram column corresponds to #a set of frames_per_spgram_column frames. #I calculate the maximum and the average value of the frames_per_spgram_column #columns in a, that correspond to the current spgram column. That way, i get the #correct length of the line to draw on the TPE in ax3, to cover in red the #existing line in that position. min_a_sound_pressure_in_range = 0 max_a_sound_pressure_in_range = 0 initial_a_index=round(sample_number-half_frames_per_spgram_column) final_a_index=round(sample_number+half_frames_per_spgram_column) print('Initial a index: ',initial_a_index) print('Final a index: ',final_a_index) for a_column in range(initial_a_index, final_a_index): if a[a_column]<min_a_sound_pressure_in_range: min_a_sound_pressure_in_range = a[a_column] if a[a_column]>max_a_sound_pressure_in_range: max_a_sound_pressure_in_range = a[a_column] #save in A_min_sound_pressure_spgram_column[] A_min_sound_pressure_spgram_column.append(min_a_sound_pressure_in_range) print('Minimum a sound pressure in range: ',min_a_sound_pressure_in_range) #save in A_min_sound_pressure_spgram_column[] A_max_sound_pressure_spgram_column.append(max_a_sound_pressure_in_range) print('Maximum a sound pressure in range: ',max_a_sound_pressure_in_range) if Count_found_spgram_columns > 0: #scan the Found_at_t_time[] array #overwrite with a red line the TPE in ax3 at the position stored in Found_at_t_time[] for i in range (1,Count_found_spgram_columns): overwrite_TPE_at_time = Found_at_t_time[i] min_line_to_draw = A_min_sound_pressure_spgram_column[i] max_line_to_draw = A_max_sound_pressure_spgram_column[i] #overwrite the TPE in ax3 with a line xfound, yfound = [overwrite_TPE_at_time, overwrite_TPE_at_time], [min_line_to_draw,max_line_to_draw] ax3.plot(xfound, yfound, color='red') plt.subplots_adjust(left=0.08, bottom=0.09, right=0.9, top=0.975, wspace=0.15, hspace=0.2) #=================== #start added 13 July #=================== class PlayPlottedWave: def start_playing(event): #if we play the unzoomed, full-girth audio file, we could #simply use the a numpy array. Start sample = 0 and #end sample = frames. But if we have zoomed, #I would like to play only the section currently zoomed print("Left xlim of ax1: ", xlim_left_ax1) print("Right xlim of ax1: ", xlim_right_ax1) start_sample=round(xlim_left_ax1*s) end_sample=round(xlim_right_ax1*s) print("Play will start from sample: ", start_sample) print("Play will end at sample: ", end_sample) array_to_play=a[start_sample:end_sample] sd.play(array_to_play, s) def stop_playing(event): sd.stop() axstart = fig.add_axes([0.925, 0.2, 0.05, 0.04]) bstart = Button(axstart, 'PLAY', color='green', hovercolor='green') bstart.on_clicked(PlayPlottedWave.start_playing) axstop = fig.add_axes([0.925, 0.15, 0.05, 0.04]) bstop = Button(axstop, 'STOP', color='red', hovercolor='red') bstop.on_clicked(PlayPlottedWave.stop_playing) #=================== #end added 13 July #=================== plt.show()